import os
import sys

def process_fastq(filename, output_directory):
    """
    Process a FASTQ file, preserving reads with unique identifiers. If there are 
    duplicate read identifiers, only the read with the longest raw sequence is preserved.
    """
    
    reads = {}
    
    # Read and process the input FASTQ file
    with open(filename, 'r') as file:
        while True:
            # Read four lines at a time (one read)
            identifier = file.readline().strip()
            sequence = file.readline().strip()
            plus_line = file.readline().strip()
            quality = file.readline().strip()

            # Check if we reached end of file
            if not identifier:
                break

            # Check if read identifier already exists and keep the one with the longest sequence
            if identifier in reads:
                if len(sequence) > len(reads[identifier][0]):
                    reads[identifier] = (sequence, plus_line, quality)
            else:
                reads[identifier] = (sequence, plus_line, quality)

    # Write to the output directory
    output_filename = os.path.join(output_directory, os.path.basename(filename))
    
    with open(output_filename, 'w') as out_file:
        for identifier, (sequence, plus_line, quality) in reads.items():
            out_file.write(f"{identifier}\n")
            out_file.write(f"{sequence}\n")
            out_file.write(f"{plus_line}\n")
            out_file.write(f"{quality}\n")

def process_all_fastq_files(input_directory, output_directory):
    """Processes all FASTQ files in the input directory and writes the output to the output directory."""
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    for root, dirs, files in os.walk(input_directory):
        for file in files:
            if file.endswith('.fastq'):
                filepath = os.path.join(root, file)
                process_fastq(filepath, output_directory)

if __name__ == "__main__":
    # Ensure the correct number of arguments are provided
    if len(sys.argv) != 3:
        print("Usage: python script_name.py <input_directory_path> <output_directory_path>")
        sys.exit(1)

    input_directory_path = sys.argv[1]
    output_directory_path = sys.argv[2]

    # Call the function to process all files in the directory
    process_all_fastq_files(input_directory_path, output_directory_path)
